/*
 * Copyright (c) 2020 Trail of Bits, Inc.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wsign-conversion"
#pragma clang diagnostic ignored "-Wconversion"
#pragma clang diagnostic ignored "-Wold-style-cast"
#pragma clang diagnostic ignored "-Wdocumentation"
#pragma clang diagnostic ignored "-Wswitch-enum"
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <llvm/IR/Module.h>
#pragma clang diagnostic pop

#include <cctype>
#include <fstream>
#include <iostream>
#include <iterator>
#include <sstream>
#include <string>
#include <utility>

// Auto-generated by cmake/protobuf inside the build directory.
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wshorten-64-to-32"
#include <CFG.pb.h>
#pragma clang diagnostic pop

#include <anvill/Decl.h>
#include <anvill/TypeParser.h>
#include <google/protobuf/io/coded_stream.h>
#include <google/protobuf/io/zero_copy_stream_impl.h>
#include <remill/Arch/Arch.h>
#include <remill/BC/Compat/Error.h>
#include <remill/BC/Util.h>
#include <remill/OS/OS.h>

#include "mcsema/Arch/Arch.h"
#include "mcsema/BC/External.h"
#include "mcsema/BC/Util.h"
#include "mcsema/CFG/CFG.h"

DECLARE_bool(explicit_args);
DECLARE_bool(merge_segments);

DEFINE_bool(
    disable_adjacent_segment_merging, false,
    "Should we disable merging of adjacent segments? Adjacent segment "
    "merging is performed in order to improve the reliability of cross-"
    "reference lifting and comparisons, e.g. if a pointer comparison is "
    "used to test if a loop should terminate, then sometimes that "
    "pointer might be one element past the end of a segment.");

namespace mcsema {
namespace {

static std::string SaneName(const std::string &name) {
  std::stringstream ss;
  for (auto c : name) {
    if (isalnum(c)) {
      ss << c;
    } else {
      ss << "_";
    }
  }
  return ss.str();
}

static std::string LiftedFunctionName(const Function &cfg_func) {
  std::stringstream ss;
  ss << "sub_" << std::hex << cfg_func.ea();
  if (cfg_func.has_name()) {
    ss << "_" << SaneName(cfg_func.name());
  }
  return ss.str();
}

static std::string LiftedSegmentName(const Segment &cfg_segment) {
  std::stringstream ss;
  if (cfg_segment.has_variable_name()) {
    auto has_name = !cfg_segment.variable_name().empty();
    LOG_IF(ERROR, !has_name)
        << "CFG variable segment " << cfg_segment.name() << " at " << std::hex
        << cfg_segment.ea() << std::dec << " has an empty name.";

    if (has_name && cfg_segment.is_exported()) {
      ss << cfg_segment.variable_name();
    } else {
      ss << "seg_var_" << std::hex << cfg_segment.ea() << "_"
         << SaneName(cfg_segment.variable_name()) << "_"
         << cfg_segment.data().size();
    }
  } else {
    ss << "seg_" << std::hex << cfg_segment.ea() << "_"
       << SaneName(cfg_segment.name()) << "_" << cfg_segment.data().size();
  }
  return ss.str();
}

static std::string LiftedVarName(const Variable &cfg_var) {
  std::stringstream ss;
  ss << "data_" << std::hex << cfg_var.ea();
  if (cfg_var.has_name()) {
    ss << "_" << SaneName(cfg_var.name());
  }
  return ss.str();
}

static std::string ExternalFuncName(const ExternalFunction &cfg_func) {
  std::stringstream ss;
  ss << "ext_" << std::hex << cfg_func.ea() << "_" << SaneName(cfg_func.name());
  return ss.str();
}

// Find the segment containing the data at `ea`.
//
// TODO(pag): Re-implement with a call to `lower_bound` or `upper_bound`.
static const NativeSegment *FindSegment(const NativeModule *module,
                                        uint64_t ea) {
  return module->TryGetSegment(ea);
}

// Resolve `xref` to a location.
static bool ResolveReference(NativeModule *module, NativeXref *xref) {
  xref->func = nullptr;
  xref->var = module->TryGetVariable(xref->target_ea);
  if (xref->var) {
    return true;
  }

  xref->func = module->TryGetFunction(xref->target_ea);
  if (xref->func) {
    return true;
  }

  if (xref->target_segment) {
    LOG(WARNING) << "Data cross reference at " << std::hex << xref->ea
                 << " in segment " << xref->segment->name << " targeting "
                 << xref->target_ea << " in segment "
                 << xref->target_segment->name
                 << " is not associated with a function or variable";
    return true;
  }

  return false;
}

// Take the `CodeReference` information from the CFG and resolve it into
// a `NativeXref`. We do a bunch of checking to see if the recorded info
// in the protobuf is sane, and sanity doesn't 100% matter, because we
// do best effort matching in here and above, so error checking is mostly
// about letting us know if we should investigate something in the Python
// side of things.
static bool FillXref(NativeModule *module, NativeInstruction *inst,
                     const CodeReference &cfg_ref, uint64_t pointer_size,
                     NativeInstructionXref *xref) {
  if (cfg_ref.has_mask()) {
    xref->mask = static_cast<uint64_t>(cfg_ref.mask());
  } else {
    xref->mask = 0;
  }
  xref->target_ea = static_cast<uint64_t>(cfg_ref.ea());

  switch (cfg_ref.operand_type()) {
    case CodeReference_OperandType_ImmediateOperand:
      LOG_IF(WARNING, inst->imm != nullptr)
          << "Overwriting existing immediate reference at instruction "
          << std::hex << inst->ea << std::dec;
      inst->imm = xref;
      break;

    case CodeReference_OperandType_MemoryOperand:
      LOG_IF(WARNING, inst->mem != nullptr)
          << "Overwriting existing absolute reference at instruction "
          << std::hex << inst->ea << std::dec;
      inst->mem = xref;
      break;

    case CodeReference_OperandType_MemoryDisplacementOperand:
      LOG_IF(WARNING, inst->disp != nullptr)
          << "Overwriting existing displacement reference at instruction "
          << std::hex << inst->ea << std::dec;
      inst->disp = xref;
      break;

    case CodeReference_OperandType_ControlFlowOperand:
      LOG_IF(WARNING, inst->flow != nullptr)
          << "Overwriting existing flow reference at instruction " << std::hex
          << inst->ea << std::dec;
      inst->flow = xref;
      break;

    case CodeReference_OperandType_OffsetTable:
      LOG_IF(WARNING, inst->offset_table != nullptr)
          << "Overwriting existing offset table reference at instruction "
          << std::hex << inst->ea << std::dec;
      inst->offset_table = xref;
      break;
  }

  return true;
}

}  // namespace

NativeObject::NativeObject(NativeModule *module_)
    : module(module_),
      forward(this) {}

NativeExternalFunction::NativeExternalFunction(NativeModule *module_)
    : NativeFunction(module_),
      cc(gArch->DefaultCallingConv()) {}

NativeSegment::Entry::Entry(uint64_t o_ea, uint64_t o_next_ea,
                            NativeXref *o_xref, NativeBlob *o_blob)
    : ea(o_ea),
      next_ea(o_next_ea),
      xref(o_xref),
      blob(o_blob) {}

void NativeObject::ForwardTo(NativeObject *dest) const {
  if (forward != this) {
    forward->ForwardTo(dest);
    forward = dest;
  } else {
    forward = dest->Get();
  }
}

const NativeObject *NativeObject::Get(void) const {
  if (forward != this) {
    forward = forward->Get();
  }
  return forward;
}

NativeObject *NativeObject::Get(void) {
  if (forward != this) {
    forward = forward->Get();
  }
  return forward;
}

llvm::Constant *NativeObject::Pointer(void) const {
  LOG(FATAL) << "Invalid use.";
  return nullptr;
}

llvm::Constant *NativeObject::Address(void) const {
  LOG(FATAL) << "Invalid use.";
  return nullptr;
}

bool NativeFunction::IsNoReturn(void) const {
  if (decl && decl->is_noreturn) {
    return true;
  }
  if (function && function->hasFnAttribute(llvm::Attribute::NoReturn)) {
    return true;
  }
  return false;
}

static void MergeVariables(NativeVariable *var, const NativeVariable *old_var) {
  var->is_exported = var->is_exported || old_var->is_exported;
  var->is_thread_local = var->is_thread_local || old_var->is_thread_local;
}

static void UpdateVariable(NativeVariable *var, const NativeSegment *seg) {
  var->is_exported = var->is_exported || seg->is_exported;
  var->is_thread_local = var->is_thread_local || seg->is_thread_local;

  LOG_IF(ERROR, var->is_thread_local && !seg->is_thread_local)
      << "Variable " << var->name << " at " << std::hex << var->ea
      << " marked as thread-local, but in non-thread-local segment "
      << seg->name << " at " << seg->ea << std::dec;
}

bool RecoverValueDecl(const NativeFunction *func, anvill::ValueDecl &decl,
                      const ValueDecl &cfg_decl) {
  auto maybe_type = anvill::ParseType(*gContext, cfg_decl.type());
  if (remill::IsError(maybe_type)) {
    LOG(ERROR) << "Could not parse type in value spec of function "
               << func->name << ": " << remill::GetErrorString(maybe_type);
    return false;
  }

  decl.type = *maybe_type;
  if (cfg_decl.has_memory()) {
    decl.mem_reg = gArch->RegisterByName(cfg_decl.memory().register_());
    if (!decl.mem_reg) {
      LOG(ERROR) << "Bad register name '" << cfg_decl.memory().register_()
                 << "' in function spec for " << func->name << " at "
                 << std::hex << func->ea << std::dec;
      return false;
    }
    decl.mem_offset = cfg_decl.memory().offset();
    return true;

  } else if (cfg_decl.has_register_()) {
    decl.reg = gArch->RegisterByName(cfg_decl.register_());
    if (!decl.reg) {
      LOG(ERROR) << "Bad register name '" << cfg_decl.register_()
                 << "' in function spec for " << func->name << " at "
                 << std::hex << func->ea << std::dec;
      return false;
    }
    return true;
  } else {
    return false;
  }
}

bool RecoverParamDecl(const NativeFunction *func, anvill::FunctionDecl &decl,
                      const ValueDecl &cfg_decl) {
  decl.params.emplace_back();
  auto &param_decl = decl.params.back();

  if (cfg_decl.has_name()) {
    param_decl.name = cfg_decl.name();
  }

  return RecoverValueDecl(func, param_decl, cfg_decl);
}

bool RecoverRetDecl(const NativeFunction *func, anvill::FunctionDecl &decl,
                    const ValueDecl &cfg_decl) {
  decl.returns.emplace_back();
  return RecoverValueDecl(func, decl.returns.back(), cfg_decl);
}

// Recover a `FunctionDecl` from the CFG.
void RecoverFunctionDecl(NativeModule *module, NativeFunction *func,
                         const FunctionDecl &cfg_decl,
                         llvm::Function *ll_func = nullptr) {
  anvill::FunctionDecl decl;
  decl.address = func->ea;
  decl.arch = gArch.get();
  decl.is_noreturn = cfg_decl.is_noreturn();
  decl.is_variadic = cfg_decl.is_variadic();

  if (ll_func) {
    if (ll_func->doesNotReturn()) {
      decl.is_noreturn = true;
    }
    if (ll_func->isVarArg()) {
      decl.is_variadic = true;
    }
  }

  decl.calling_convention =
      static_cast<llvm::CallingConv::ID>(cfg_decl.calling_convention());

  if (!cfg_decl.has_return_stack_pointer()) {
    LOG(ERROR) << "Function spec for " << func->name << " at " << std::hex
               << func->ea << std::dec << " is missing a return stack pointer";
    return;
  }

  if (cfg_decl.return_stack_pointer().has_memory()) {

    decl.return_stack_pointer = gArch->RegisterByName(
        cfg_decl.return_stack_pointer().memory().register_());

    decl.return_stack_pointer_offset =
        cfg_decl.return_stack_pointer().memory().offset();

  } else if (cfg_decl.return_stack_pointer().has_register_()) {
    decl.return_stack_pointer =
        gArch->RegisterByName(cfg_decl.return_stack_pointer().register_());
  } else {
    LOG(ERROR) << "Function spec for " << func->name << " at " << std::hex
               << func->ea << std::dec << " is missing a return stack pointer";
    return;
  }

  if (!decl.return_stack_pointer) {
    if (cfg_decl.return_stack_pointer().has_memory()) {
      LOG(ERROR) << "Function spec for " << func->name << " at " << std::hex
                 << func->ea << std::dec
                 << " has an invalid return stack pointer name '"
                 << cfg_decl.return_stack_pointer().memory().register_() << "'";
    } else {
      LOG(ERROR) << "Function spec for " << func->name << " at " << std::hex
                 << func->ea << std::dec
                 << " has an invalid return stack pointer name '"
                 << cfg_decl.return_stack_pointer().register_() << "'";
    }
    return;
  }

  for (const auto &cfg_ret_val : cfg_decl.return_values()) {
    if (!RecoverRetDecl(func, decl, cfg_ret_val)) {
      return;
    }
  }

  for (const auto &cfg_param_val : cfg_decl.parameters()) {
    if (!RecoverParamDecl(func, decl, cfg_param_val)) {
      return;
    }
  }

  if (!RecoverValueDecl(func, decl.return_address, cfg_decl.return_address())) {
    return;
  }

  // Just in case IDA is wrong.
  if (decl.params.empty()) {
    decl.is_variadic = true;
  }

  auto maybe_func = module->DeclareFunction(decl, true);
  if (remill::IsError(maybe_func)) {
    LOG(ERROR) << remill::GetErrorString(maybe_func);
    return;

  } else {
    func->decl = *maybe_func;
  }
}

template <size_t kNumXrefs>
struct NativeInstructionWithXrefs : public NativeInstruction {
 public:
  NativeInstructionXref xrefs[kNumXrefs];
};

// Convert the protobuf into an in-memory data structure. This does a fair
// amount of checking and tries to correct errors in favor of converting
// variables into functions, and internals into externals. The intuition is
// that, at least in ELF binaries, externals will usually have some kind of
// 'internal' location for the sake of linking, and so we want to dedup
// internals into externals whenever possible.
NativeModule *ReadProtoBuf(const std::string &file_name,
                           uint64_t pointer_size) {
  GOOGLE_PROTOBUF_VERIFY_VERSION;

  std::ifstream fstream(file_name, std::ios::binary);
  CHECK(fstream.good()) << "Unable to open CFG file " << file_name;

  google::protobuf::io::IstreamInputStream pstream(&fstream);
  google::protobuf::io::CodedInputStream cstream(&pstream);
  cstream.SetTotalBytesLimit(512 * 1024 * 1024, -1);
  Module cfg;
  CHECK(cfg.ParseFromCodedStream(&cstream))
      << "Unable to read module from CFG file " << file_name;

  LOG(INFO) << "Lifting program " << cfg.name() << " via CFG protobuf in "
            << file_name;

  auto module = new NativeModule;

  // Bring in the functions, although not their blocks or instructions. This
  // first step enables better cross-reference resolution when we deserialize
  // the instructions.
  module->ea_to_func.reserve(static_cast<size_t>(cfg.funcs_size()));
  for (auto f = 0, max_f = cfg.funcs_size(); f < max_f; ++f) {
    const auto cfg_func = cfg.mutable_funcs(f);
    const auto func_ea = static_cast<uint64_t>(cfg_func->ea());
    if (auto found_func = module->TryGetFunction(func_ea)) {

      // TODO(pag): Add some kind of name->var mapping so that we can
      //            cover this.

      if (!found_func->is_exported && cfg_func->is_entrypoint()) {
        found_func->is_exported = true;

        // Steal the exported name.
        if (cfg_func->has_name()) {
          found_func->name = std::move(*(cfg_func->mutable_name()));
        }
      }

      // This re-declaration has a decl.
      if (!found_func->decl && cfg_func->has_decl()) {
        RecoverFunctionDecl(module, found_func, cfg_func->decl());
      }

      LOG_IF(WARNING, found_func->name != cfg_func->name())
          << "Two or more names for function at " << std::hex << found_func->ea
          << std::dec << ": '" << found_func->name << "' and '"
          << cfg_func->name() << "'";
      continue;
    }

    const auto func = new NativeFunction(module);
    module->functions.emplace_back(func);

    func->ea = func_ea;
    func->lifted_name = LiftedFunctionName(*cfg_func);
    if (cfg_func->has_name()) {
      func->name = std::move(*(cfg_func->mutable_name()));
    }
    func->is_exported = cfg_func->is_entrypoint();

    module->ea_to_func[func->ea] = func;
    LOG(INFO) << "Found function " << func->name << " at " << std::hex
              << func->ea << std::dec;

    if (func->is_exported) {
      CHECK(!func->name.empty())
          << "Exported function at address " << std::hex << func->ea << std::dec
          << " does not have a name";

      LOG(INFO) << "Exported function " << func->name << " at " << std::hex
                << func->ea << std::dec << " is implemented by "
                << func->lifted_name;
    }

    module->AddNameToAddress(func->name, func->ea);

    auto ll_func = gModule->getFunction(func->name);

    // Extract the function decl.
    if (!func->decl && cfg_func->has_decl()) {
      RecoverFunctionDecl(module, func, cfg_func->decl(), ll_func);
    }

    // NOTE(pag): We don't store this in `func->function` as it may get
    //            optimized out, and so we just want to recall enough info
    //            about the function.
    if (ll_func) {

      if (ll_func->hasPrivateLinkage() || ll_func->hasInternalLinkage()) {
        func->is_exported = false;
      }

      auto maybe_decl = anvill::FunctionDecl::Create(*ll_func, gArch);

      if (remill::IsError(maybe_decl)) {
        LOG(ERROR) << remill::GetErrorString(maybe_decl);
      } else {
        maybe_decl->address = func->ea;
        auto maybe_decl_ptr = module->DeclareFunction(*maybe_decl, true);
        if (remill::IsError(maybe_decl_ptr)) {
          LOG(ERROR) << remill::GetErrorString(maybe_decl_ptr);
        } else {
          LOG_IF(WARNING, func->decl != nullptr)
              << "Overwriting CFG-specified function specification for "
              << func->name << " at " << std::hex << func->ea << std::dec
              << " with one based off of existing module function";
          func->decl = *maybe_decl_ptr;
        }
      }
    }
  }

  const bool is_windows = remill::kOSWindows == gArch->os_name;

  std::unordered_map<std::string, const NativeExternalFunction *>
      name_to_extern_func;

  // Bring in the external functions. If an internal function can take its
  // place, then we use it.
  for (auto f = 0, max_f = cfg.external_funcs_size(); f < max_f; ++f) {
    const auto cfg_extern_func = cfg.mutable_external_funcs(f);
    const auto func_ea = static_cast<uint64_t>(cfg_extern_func->ea());

    if (!cfg_extern_func->has_name()) {
      LOG(WARNING) << "Ignoring external function at " << std::hex << func_ea
                   << std::dec << " with no name.";
      continue;
    }

    if (auto found_func = module->TryGetFunction(func_ea)) {
      if (!found_func->is_exported) {
        found_func->is_exported = true;
        found_func->name = std::move(*(cfg_extern_func->mutable_name()));

        LOG(INFO) << "Exported function " << found_func->name << " at "
                  << std::hex << found_func->ea << std::dec
                  << " is implemented by " << found_func->lifted_name;

      } else if (found_func->name != cfg_extern_func->name()) {
        LOG(ERROR) << "Duplicate conflicting names for function at " << std::hex
                   << func_ea << std::dec << ": '" << found_func->name
                   << "' and '" << cfg_extern_func->name() << "'";

      } else {
        LOG(WARNING) << "Ignoring external function '" << found_func->name
                     << "' at " << std::hex << func_ea << std::dec
                     << " that shadows internal function at same address";
      }
      continue;
    }

    const auto func = new NativeExternalFunction(module);
    module->functions.emplace_back(func);

    func->lifted_name = ExternalFuncName(*cfg_extern_func);
    func->name = std::move(*(cfg_extern_func->mutable_name()));
    func->ea = func_ea;
    func->is_external = true;
    func->is_exported = true;
    func->is_weak = cfg_extern_func->is_weak();
    func->num_args = 0;
    func->cc = gArch->DefaultCallingConv();

    module->AddNameToAddress(func->name, func->ea);

    // NOTE(pag): We don't store this in `func->function` as it may get
    //            optimized out, and so we just want to recall enough info
    //            about the function.
    const auto ll_func = gModule->getFunction(func->name);

    LOG(INFO) << "Found external function " << func->name << " at " << std::hex
              << func->ea << std::dec;

    // Extract the function decl. This might declare the extern in the module.
    if (cfg_extern_func->has_decl()) {
      RecoverFunctionDecl(module, func, cfg_extern_func->decl(), ll_func);
    }

    llvm::CallingConv::ID ll_func_cc = llvm::CallingConv::C;

    if (ll_func) {
      func->num_args = static_cast<unsigned>(ll_func->arg_size());

      auto maybe_decl = anvill::FunctionDecl::Create(*ll_func, gArch);
      ll_func_cc = ll_func->getCallingConv();

      if (remill::IsError(maybe_decl)) {
        LOG(ERROR) << remill::GetErrorString(maybe_decl);

      } else {
        maybe_decl->address = func->ea;
        auto maybe_decl_ptr = module->DeclareFunction(*maybe_decl, true);
        if (remill::IsError(maybe_decl_ptr)) {
          LOG(ERROR) << remill::GetErrorString(maybe_decl_ptr);

        } else {
          LOG_IF(WARNING, func->decl != nullptr)
              << "Overwriting CFG-specified function specification for "
              << func->name << " at " << std::hex << func->ea << std::dec
              << " with one based off of existing module function";
          func->decl = *maybe_decl_ptr;
        }
      }

    } else if (cfg_extern_func->has_argument_count()) {
      func->num_args = static_cast<unsigned>(cfg_extern_func->argument_count());
    }

    // Most calling convention stuff is actually only meaningful for 32-bit,
    // x86 code. McSema was originally designed for 32-bit X86, so there needed
    // to be a way to distinguish between the various calling conventions used,
    // and so each function was given a specific one. But then 64-bit support
    // came along and now we're in a bad place where the calling convention is
    // specified, but only in a way that is relevant to 32-bit x86, so we need
    // to ignore it in some places and not others.
    if (cfg_extern_func->has_cc()) {
      if (gArch->IsX86()) {
        switch (cfg_extern_func->cc()) {
          case ExternalFunction_CallingConvention_CalleeCleanup:
            func->cc = llvm::CallingConv::X86_StdCall;
            break;

          case ExternalFunction_CallingConvention_FastCall:
            func->cc = llvm::CallingConv::X86_FastCall;
            break;

          case ExternalFunction_CallingConvention_CallerCleanup:  // cdecl.
            func->cc = llvm::CallingConv::C;
            break;

          default:
            if (is_windows) {
              func->cc = llvm::CallingConv::X86_StdCall;
            }
            break;
        }

      } else if (gArch->IsAMD64()) {
        if (is_windows) {
          func->cc = llvm::CallingConv::Win64;
        } else {
          func->cc = llvm::CallingConv::X86_64_SysV;
        }
      }
    }

    // Override.
    if (ll_func) {
      func->cc = ll_func_cc;
    }

    LOG_IF(WARNING, module->ea_to_var.count(func->ea))
        << "Internal variable at " << std::hex << func->ea
        << " has the same name as the external function " << func->name;

    LOG(INFO) << "Found external function " << func->name << " via " << std::hex
              << func->ea;

    // Check to see if an external function with the same name was already
    // added. This is possible if there are things like thunks calling thunks,
    // or thin wrappers around thunks.
    auto extern_func_it = name_to_extern_func.find(func->name);
    if (extern_func_it != name_to_extern_func.end()) {
      auto dup_func = extern_func_it->second;
      dup_func->ForwardTo(func);

      CHECK_NE(dup_func->ea, func->ea);

      LOG(WARNING) << "External function " << func->name << " at " << std::hex
                   << func->ea << " is also defined at " << std::hex
                   << dup_func->ea;
    }

    module->ea_to_func[func->ea] = func;
  }

  std::unordered_map<std::string, const NativeExternalVariable *>
      name_to_extern_var;

  // Bring in the external variables.
  for (auto v = 0, max_v = cfg.external_vars_size(); v < max_v; ++v) {
    const auto cfg_extern_var = cfg.mutable_external_vars(v);
    const auto var_ea = static_cast<uint64_t>(cfg_extern_var->ea());
    if (!cfg_extern_var->has_name()) {
      LOG(WARNING) << "Ignoring unnamed external variable at " << std::hex
                   << var_ea << std::dec;
      continue;
    }

    if (auto found_func = module->TryGetFunction(var_ea)) {
      if (!found_func->is_exported) {
        found_func->is_exported = true;
        found_func->name = std::move(*(cfg_extern_var->mutable_name()));

        LOG(INFO) << "Exported function " << found_func->name << " at "
                  << std::hex << found_func->ea << std::dec
                  << " is implemented by " << found_func->lifted_name
                  << " using hint from external variable";

      } else if (found_func->name != cfg_extern_var->name()) {
        LOG(ERROR) << "Duplicate conflicting names for function at " << std::hex
                   << var_ea << std::dec << " '" << found_func->name
                   << "' and variable '" << cfg_extern_var->name() << "'";

      } else {
        LOG(WARNING) << "Ignoring external variable '" << found_func->name
                     << "' at " << std::hex << var_ea << std::dec
                     << " that shadows function at same address";
      }
      continue;
    }

    const auto seg = new NativeSegment(module);
    module->segments.emplace_back(seg);

    auto var = new NativeExternalVariable(module);
    module->variables.emplace_back(var);

    auto seg_size = static_cast<uint64_t>(cfg_extern_var->size());
    auto &seg_ptr = module->ea_to_seg[var_ea];
    if (seg_ptr) {
      LOG(ERROR) << "Segment '" << seg_ptr->name << "' at " << std::hex
                 << seg_ptr->ea << " is being redefined as '"
                 << cfg_extern_var->name() << "' at " << var_ea << std::dec;

      if (seg_ptr->as_extern_var) {
        seg_ptr->as_extern_var->ForwardTo(var);
        seg_ptr->as_extern_var = var;
      } else {
        seg_ptr->as_extern_var = var;
      }

      seg_ptr->ForwardTo(seg);
      seg_size = std::max(seg_ptr->size, seg_size);
    }

    seg_ptr = seg;
    seg->ea = var_ea;
    seg->name = cfg_extern_var->name();
    seg->lifted_name = cfg_extern_var->name();
    seg->is_external = true;
    seg->is_exported = false;
    seg->is_thread_local = cfg_extern_var->is_thread_local();
    seg->is_read_only = false;
    seg->size = seg_size;
    seg->as_extern_var = var;

    var->ea = var_ea;
    var->name = std::move(*(cfg_extern_var->mutable_name()));
    var->lifted_name = var->name;
    var->segment = seg;
    var->is_external = true;
    var->is_exported = false;
    var->is_thread_local = cfg_extern_var->is_thread_local();
    var->is_weak = cfg_extern_var->is_weak();
    var->size = seg_size;

    LOG(INFO) << "Found external variable " << var->name << " at " << std::hex
              << var->ea << std::dec;

    // Look for two extern variables with the same name.
    auto extern_var_it = name_to_extern_var.find(var->name);
    if (extern_var_it != name_to_extern_var.end()) {
      auto dup_var = extern_var_it->second;
      MergeVariables(var, dup_var);
      dup_var->ForwardTo(var);

      CHECK_NE(dup_var->ea, var->ea);

      LOG(WARNING) << "External variable " << var->name << " at " << std::hex
                   << var->ea << " is also defined at " << dup_var->ea
                   << std::dec;
    }

    module->ea_to_var[var->ea] = var;
    name_to_extern_var[var->name] = var;
  }

  // Collect variables from within the data sections. We set up the segment
  // information by not their data. We leave that until later when all
  // cross-references have been resolved.
  for (auto s = 0, max_s = cfg.segments_size(); s < max_s; ++s) {
    auto cfg_segment = cfg.mutable_segments(s);

    if (cfg_segment->has_variable_name()) {
      if (name_to_extern_var.count(cfg_segment->variable_name())) {
        LOG(ERROR) << "Skipping segment '" << cfg_segment->variable_name()
                   << " as it's already associated with an extern var";
        ;
        continue;
      }
    }

    auto segment = new NativeSegment(module);
    module->segments.emplace_back(segment);

    segment->ea = static_cast<uint64_t>(cfg_segment->ea());
    segment->size = cfg_segment->data().size();
    segment->lifted_name = LiftedSegmentName(*cfg_segment);

    if (cfg_segment->has_variable_name()) {
      segment->name = std::move(*(cfg_segment->mutable_variable_name()));

    } else if (cfg_segment->has_name()) {
      segment->name = std::move(*(cfg_segment->mutable_name()));
    }

    segment->is_read_only = cfg_segment->read_only();
    segment->is_external = cfg_segment->is_external();
    segment->is_exported = cfg_segment->is_exported();
    segment->is_thread_local = cfg_segment->is_thread_local();

    // Collect the variables.
    for (auto v = 0, max_v = cfg_segment->vars_size(); v < max_v; ++v) {
      auto cfg_var = cfg_segment->mutable_vars(v);
      const auto var_ea = static_cast<uint64_t>(cfg_var->ea());

      if (auto found_var = module->TryGetVariable(var_ea)) {

        // TODO(pag): Add some kind of name->var mapping so that we can
        //            cover this.

        LOG_IF(WARNING, found_var->name != cfg_var->name())
            << "Two or more names for variable at " << std::hex << found_var->ea
            << std::dec << ": '" << found_var->name << "' and '"
            << cfg_var->name() << "'";

        UpdateVariable(found_var, segment);

      } else if (auto found_func = module->TryGetFunction(var_ea)) {
        LOG(WARNING) << "Ignoring variable '" << cfg_var->name() << "' at "
                     << std::hex << var_ea << std::dec
                     << " that shadows function '" << found_func->name << "'";

      } else if (!cfg_var->has_name()) {
        LOG(ERROR) << "Unnamed variable at " << std::hex << cfg_var->ea()
                   << std::dec << " in segment " << segment->name;

      } else {
        auto var = new NativeVariable(module);
        module->variables.emplace_back(var);

        var->ea = var_ea;
        var->lifted_name = LiftedVarName(*cfg_var);
        if (cfg_var->has_name()) {
          var->name = std::move(*(cfg_var->mutable_name()));
        }
        var->segment = segment;
        module->ea_to_var[var->ea] = var;

        // Avoid name conflicts with things like `.bss`.
        if (var->ea == segment->ea) {
          segment->name = var->name;
        }

        LOG(INFO) << "Found variable " << var->name << " at " << std::hex
                  << var->ea << std::dec;
      }
    }

    LOG(INFO) << "Found segment " << segment->name << " [" << std::hex
              << segment->ea << ", " << (segment->ea + segment->size)
              << std::dec << ")";

    auto &seg_ptr = module->ea_to_seg[segment->ea];
    LOG_IF(WARNING, !!seg_ptr)
        << "Segment '" << segment->name << "' overlaps existing segment "
        << "'" << seg_ptr->name << "'";

    seg_ptr = segment;
  }

  // Fill in the cross-reference entries for each segment.
  for (const auto &cfg_segment : cfg.segments()) {
    auto ea = static_cast<uint64_t>(cfg_segment.ea());
    const auto segment = const_cast<NativeSegment *>(module->ea_to_seg[ea]);

    std::map<uint64_t, const NativeXref *> xrefs;
    for (const auto &cfg_xref : cfg_segment.xrefs()) {
      std::unique_ptr<NativeXref> xref(new NativeXref);
      xref->ea = static_cast<uint64_t>(cfg_xref.ea());
      xref->segment = segment;
      xref->width = static_cast<uint32_t>(cfg_xref.width());
      xref->target_ea = static_cast<uint64_t>(cfg_xref.target_ea());
      xref->target_segment = FindSegment(module, xref->target_ea);

      CHECK(xref->width <= pointer_size)
          << "Cross reference at " << std::hex << xref->ea << " to " << std::hex
          << xref->target_ea << " is too wide at " << xref->width << " bytes";

      switch (cfg_xref.target_fixup_kind()) {
        case DataReference_TargetFixupKind_Absolute:
          xref->fixup_kind = NativeXref::kAbsoluteFixup;
          break;
        case DataReference_TargetFixupKind_OffsetFromThreadBase:
          xref->fixup_kind = NativeXref::kThreadLocalOffsetFixup;
          break;
      }

      if (!ResolveReference(module, xref.get())) {
        continue;
      }

      auto &entry = segment->entries[xref->ea];
      entry.ea = xref->ea;
      entry.next_ea = xref->ea + xref->width;
      entry.xref = std::move(xref);
    }
  }

  // Fill in the blob data entries for each segment.
  for (const auto &cfg_segment : cfg.segments()) {
    auto ea = static_cast<uint64_t>(cfg_segment.ea());
    const auto segment = const_cast<NativeSegment *>(module->ea_to_seg[ea]);
    std::vector<NativeSegment::Entry> blobs;

    // Sentinel.
    auto seg_end_ea = ea + segment->size;
    auto &entry = segment->entries[seg_end_ea];
    entry.ea = seg_end_ea;
    entry.next_ea = seg_end_ea;

    anvill::ByteRange range;
    range.address = ea;
    range.is_writeable = !cfg_segment.read_only();
    range.is_executable = true;  // TODO(pag): Fix this.
    range.begin = reinterpret_cast<const uint8_t *>(cfg_segment.data().data());
    range.end = &(range.begin[cfg_segment.data().size()]);
    if (auto err = module->MapRange(range); remill::IsError(err)) {
      LOG(FATAL) << "Unable to map segment " << segment->name << ": "
                 << remill::GetErrorString(err);
    }

    for (const auto &xref_entry : segment->entries) {
      const auto &entry = xref_entry.second;

      // Split this segment's data up into logical components based on the
      // variables indexing into this segment.
      while (ea < entry.ea) {
        if (ea == seg_end_ea) {
          break;
        }

        // Find the beginning of the next variable or entry.
        uint64_t size = 1;
        for (; (ea + size) < entry.ea; ++size) {
          if (module->ea_to_var.count(ea + size)) {
            break;
          }
        }

        constexpr size_t kMaxBlobSize = 4;
        std::unique_ptr<NativeBlob> blob;

        // We ideally want to do a kind of run-length encoding of the blob
        // sections, so that we can avoid explicitly representing large
        // sequences of zeroes. Our approach is to artificially split a section
        // into groups of at most `kMaxBlobSize` bytes, then merge together
        // adjacent blobs that are marked as `is_zero`.
        auto end_ea = ea + static_cast<unsigned>(size);
        for (auto blob_ea = ea; blob_ea < end_ea; blob_ea += kMaxBlobSize) {
          const auto blob_end_ea = std::min(blob_ea + kMaxBlobSize, end_ea);

          // NOTE(pag): The `NativeSegment::Entry` takes ownership of `blob`.
          if (!blob) {
            blob.reset(new NativeBlob);
          }
          blob->ea = blob_ea;
          blob->size = static_cast<unsigned>(blob_end_ea - blob_ea);
          blob->is_zero = true;

          for (auto i = blob_ea; i < blob_end_ea; ++i) {
            if (range.begin[i - range.address]) {
              blob->is_zero = false;
              break;
            }
          }

          // Merge zero blobs with zero blobs, and non-zero blobs with non-zero
          // blobs.
          if (!blobs.empty() && blobs.back().next_ea == blob_ea &&
              blobs.back().blob &&
              blob->is_zero == blobs.back().blob->is_zero) {

            blobs.back().blob->size += blob->size;
            blobs.back().next_ea = blob_end_ea;

          } else {
            blobs.push_back(NativeSegment::Entry{blob_ea, blob_end_ea, nullptr,
                                                 blob.release()});
          }
        }

        ea += size;
      }

      CHECK(ea == entry.ea)
          << "Invalid partitioning of data before " << std::hex << entry.ea;

      if (ea == seg_end_ea) {
        break;
      }

      // Do some sanity checking to see if there are any variables pointing
      // into some actual cross-references. This is more strange than anything.
      for (ea += 1; ea < entry.next_ea; ++ea) {
        auto var_it = module->ea_to_var.find(ea);
        LOG_IF(ERROR, var_it != module->ea_to_var.end())
            << "Variable " << var_it->second->name << " at " << std::hex
            << var_it->second->ea
            << " points into a cross reference, located at " << std::hex
            << entry.xref->ea << " and targeting " << std::hex
            << entry.xref->target_ea;
      }
    }

    segment->entries.erase(seg_end_ea);

    // Add the blobs into the partition.
    for (auto &entry : blobs) {
      segment->entries.emplace(entry.ea, std::move(entry));
    }

    // Verify the partitioning of this segment's data.
    ea = segment->ea;
    unsigned entry_num = 0;
    for (const auto &entry : segment->entries) {
      CHECK(entry.first == ea)
          << "Invalid partitioning of segment " << segment->name << "; entry #"
          << std::dec << entry_num << " EA address " << std::hex << entry.first
          << " does not match "
          << "up with expected entry EA " << std::hex << ea;

      CHECK(entry.second.ea == ea)
          << "Invalid partitioning of segment " << segment->name;

      CHECK(entry.second.next_ea > entry.second.ea)
          << "Invalid partitioning of segment " << segment->name;

      ea = entry.second.next_ea;
      entry_num++;
    }

    CHECK(ea == (segment->ea + segment->size))
        << "Invalid partitioning of segment " << segment->name;
  }

  // Add in each of the function's blocks. At this stage we have all cross-
  // reference information available.
  for (const auto &cfg_func : cfg.funcs()) {
    auto func = module->TryGetFunction(static_cast<uint64_t>(cfg_func.ea()));
    if (!func) {
      continue;
    }

    if (auto func_byte = module->FindByte(func->ea);
        !func_byte.IsExecutable()) {
      LOG(ERROR)
          << "Could not find any executable bytes associated with function "
          << std::hex << func->ea << std::dec;
      continue;
    }

    // Extract the eh_frame entries associated with the function
    for (const auto &entry : cfg_func.eh_frame()) {
      auto frame_var = new NativeExceptionFrame(module);

      frame_var->start_ea = entry.start_ea();
      frame_var->end_ea = entry.end_ea();
      frame_var->lp_ea = entry.lp_ea();
      frame_var->action_index = entry.action();

      // List all the types of the landing pad
      for (const auto &extern_var : entry.ttype()) {
        auto var = new NativeExternalVariable(module);
        module->variables.emplace_back(var);

        // TODO(pag): Initialize `var->segment`.

        var->ea = static_cast<uint64_t>(extern_var.ea());
        var->name = extern_var.name();
        var->is_external = true;
        var->is_exported = true;
        var->is_thread_local = extern_var.is_thread_local();
        var->lifted_name = var->name;
        var->is_weak = extern_var.is_weak();
        var->size = static_cast<uint64_t>(extern_var.size());
        frame_var->type_var[static_cast<uint64_t>(extern_var.size())] = var;
      }

      func->eh_frame.emplace_back(frame_var);
    }

    for (const auto &cfg_block : cfg_func.blocks()) {
      const auto block_ea = static_cast<uint64_t>(cfg_block.ea());
      auto &block = module->ea_to_block[block_ea];
      if (!block) {
        block.reset(new NativeBlock);
        block->ea = block_ea;
      }

      if (cfg_block.is_referenced_by_data()) {
        block->is_referenced_by_data = true;
      }

      // Add in the addresses of the block's successors.
      for (auto succ_ea : cfg_block.successor_eas()) {
        block->successor_eas.push_back(static_cast<uint64_t>(succ_ea));
      }

      // Merge in successors.
      std::sort(block->successor_eas.begin(), block->successor_eas.end());
      auto it =
          std::unique(block->successor_eas.begin(), block->successor_eas.end());
      block->successor_eas.erase(it, block->successor_eas.end());

      func->blocks.push_back(block.get());

      // Add in the block's instructions.
      for (const auto &cfg_inst : cfg_block.instructions()) {

        const auto inst_ea = static_cast<uint64_t>(cfg_inst.ea());

        // Don't add it if we've already got it.
        block->last_inst_ea = std::max(block->last_inst_ea, inst_ea);

        // If there is no possibility of interesting metadata, then we don't
        // actually need a `NativeInstruction`.
        if (!cfg_inst.lp_ea() && !cfg_inst.xrefs_size()) {
          continue;
        }

        auto &inst = module->ea_to_inst[inst_ea];
        if (!inst) {

          NativeInstructionXref *xrefs = nullptr;

          switch (cfg_inst.xrefs_size()) {
            case 5: {
              const auto xinst = new NativeInstructionWithXrefs<5>;
              inst.reset(xinst);
              xrefs = &(xinst->xrefs[0]);
              break;
            }
            case 4: {
              const auto xinst = new NativeInstructionWithXrefs<4>;
              inst.reset(xinst);
              xrefs = &(xinst->xrefs[0]);
              break;
            }
            case 3: {
              const auto xinst = new NativeInstructionWithXrefs<3>;
              inst.reset(xinst);
              xrefs = &(xinst->xrefs[0]);
              break;
            }
            case 2: {
              const auto xinst = new NativeInstructionWithXrefs<2>;
              inst.reset(xinst);
              xrefs = &(xinst->xrefs[0]);
              break;
            }
            case 1: {
              const auto xinst = new NativeInstructionWithXrefs<1>;
              inst.reset(xinst);
              xrefs = &(xinst->xrefs[0]);
              break;
            }
            case 0: {
              inst.reset(new NativeInstruction);
              break;
            }
            default:
              LOG(FATAL)
                  << "Unsupported number of cross-references attached to "
                  << "instruction at " << std::hex << inst_ea << std::dec;
              continue;
          }

          inst->ea = inst_ea;
          inst->lp_ea = static_cast<uint64_t>(cfg_inst.lp_ea());

          for (const auto &cfg_ref : cfg_inst.xrefs()) {
            if (FillXref(module, inst.get(), cfg_ref, pointer_size, xrefs)) {
              ++xrefs;
            }
          }
        }
      }
    }
  }

  auto num_preserved_reg_sets =
      cfg.preserved_regs_size() + cfg.dead_regs_size();
  module->preserved_regs.resize(static_cast<unsigned>(num_preserved_reg_sets));
  auto i = 0;
  for (; i < cfg.preserved_regs_size(); ++i) {
    auto &reg_set = module->preserved_regs[static_cast<unsigned>(i)];
    const auto &cfg_reg_set = cfg.preserved_regs(i);
    for (const auto &reg_name : cfg_reg_set.registers()) {
      reg_set.reg_names.push_back(reg_name);
    }
    for (const auto &cfg_range : cfg_reg_set.ranges()) {
      const auto ea = static_cast<uint64_t>(cfg_range.begin_ea());
      if (cfg_range.has_end_ea()) {
        const auto end_ea = static_cast<uint64_t>(cfg_range.end_ea());
        module->ea_to_range_preserved_regs.emplace(
            ea, std::make_pair(end_ea, &reg_set));
      } else {
        module->ea_to_inst_preserved_regs.emplace(ea, &reg_set);
      }
    }
  }

  for (auto j = 0; i < num_preserved_reg_sets; ++i, ++j) {
    auto &reg_set = module->preserved_regs[static_cast<unsigned>(i)];
    const auto &cfg_reg_set = cfg.dead_regs(j);
    for (const auto &reg_name : cfg_reg_set.registers()) {
      reg_set.reg_names.push_back(reg_name);
    }
    for (const auto &cfg_range : cfg_reg_set.ranges()) {
      const auto ea = static_cast<uint64_t>(cfg_range.begin_ea());
      CHECK(!cfg_range.has_end_ea());
      module->ea_to_inst_killed_regs.emplace(ea, &reg_set);
    }
  }

  auto prune_segments = [=](void) {
    for (auto &seg : module->segments) {
      if (seg.get() != seg->Get()) {
        module->unused_segments.emplace_back(std::move(seg));
      }
    }

    std::sort(module->segments.begin(), module->segments.end(),
              [](const std::unique_ptr<NativeSegment> &a,
                 const std::unique_ptr<NativeSegment> &b) {
                return a.get() > b.get();
              });

    while (!module->segments.empty() && !module->segments.back()) {
      module->segments.pop_back();
    }
  };

  prune_segments();

  if (!FLAGS_disable_adjacent_segment_merging) {

    // Order the segments in increasing order of their starting address, and
    // groups external segments, which may overlap internal ones due to
    // external vars, together, and internal segments together, so that we
    // don't miss merging opportunities.
    std::sort(module->segments.begin(), module->segments.end(),
              [](const std::unique_ptr<NativeSegment> &a,
                 const std::unique_ptr<NativeSegment> &b) {
                if (a->is_external == b->is_external) {
                  return a->ea < b->ea;
                } else {
                  return a->is_external < b->is_external;
                }
              });

    auto merged = false;

    for (auto i = module->segments.size(); i > 1u;) {
      auto &seg = module->segments[--i];
      auto &prev_seg = module->segments[i - 1u];
      if ((prev_seg->ea + prev_seg->size) == seg->ea &&
          !prev_seg->is_external && !seg->is_external) {
        merged = true;

        LOG(WARNING) << "Merging adjacent segments " << prev_seg->name << " at "
                     << std::hex << prev_seg->ea << " and " << seg->name
                     << " at " << seg->ea << std::dec;

        prev_seg->size += seg->size;
        seg->size = 0;

        prev_seg->entries.insert(std::make_move_iterator(seg->entries.begin()),
                                 std::make_move_iterator(seg->entries.end()));
        seg->entries.clear();

        if (!seg->is_read_only) {
          LOG_IF(WARNING, prev_seg->is_read_only)
              << "Marking read-only segment " << prev_seg->name << " at "
              << std::hex << prev_seg->ea << " as read/write due to merging"
              << std::dec;
          prev_seg->is_read_only = false;
        }

        seg->ForwardTo(prev_seg.get());
      }
    }

    if (merged) {
      prune_segments();
    }
  }

  // Order the segments in increasing order of size.
  std::sort(module->segments.begin(), module->segments.end(),
            [](const std::unique_ptr<NativeSegment> &a,
               const std::unique_ptr<NativeSegment> &b) {
              return a->Get()->size < b->Get()->size;
            });

  for (auto &entry : module->ea_to_seg) {
    entry.second = entry.second->Get();
  }

  for (auto &entry : module->ea_to_func) {
    const auto func = const_cast<NativeFunction *>(entry.second->Get());
    entry.second = func;

    if (!func->is_exported && !func->is_external) {
      continue;
    }

    // Don't export these. They shouldn't be allowed to conflict with any
    // originals if we recompile to sparc/x86.
    if ((func->is_exported && gArch->IsSPARC64() &&
         func->name.find("__sparc_get_pc_thunk") == 0) ||
        (func->is_exported && gArch->IsX86() &&
         func->name.find("__x86.get_pc_thunk") == 0)) {

      func->is_exported = false;
      continue;
    }

    // The ABI-specific declaration that we create from `ll_func` might
    // differ in subtle ways from the actual prototype of `ll_func`. E.g.
    // On 32-bit, returning an `i64` migth turn into returning `[2 x i32]`
    // or returning `{i32, i32}`.
    if (auto ll_func = gModule->getFunction(func->name); ll_func) {
      if (ll_func->hasPrivateLinkage() || ll_func->hasInternalLinkage()) {
        func->is_exported = false;
      }

      if (!ll_func->hasNUsesOrMore(1)) {
        LOG(INFO) << "Erasing " << ll_func->getName().str();
        ll_func->eraseFromParent();
      } else {
        LOG(ERROR) << "Renaming existing LLVM function " << func->name
                   << " to avoid future name clashes with external function at "
                   << std::hex << func->ea << std::dec;
        ll_func->setName(func->name + "__original");
      }
    }
  }

  for (auto &entry : module->ea_to_var) {
    auto &var = const_cast<NativeVariable *&>(entry.second);
    var = var->Get();
    var->segment = var->segment->Get();
  }

  for (auto &seg : module->segments) {
    if (!seg->is_external && !FLAGS_merge_segments) {
      seg->padding = seg->ea & 4095u;
    }

    for (auto &entry : seg->entries) {
      if (const auto xref = entry.second.xref.get(); xref) {
        if (xref->segment) {
          xref->segment = xref->segment->Get();
        }
        if (xref->target_segment) {
          xref->target_segment = xref->target_segment->Get();
        }
      }
    }
  }

  return module;
}

const NativeSegment *NativeModule::TryGetSegment(llvm::StringRef name) const {
  for (const auto &seg : segments) {
    if (name == seg->lifted_name) {
      return seg->Get();
    }
  }
  return nullptr;
}

const NativeSegment *NativeModule::TryGetSegment(uint64_t ea) const {
  for (const auto &seg_ : segments) {
    const auto seg = seg_->Get();
    auto seg_end = seg->ea + seg->size;
    if (seg->ea <= ea && ea < seg_end) {
      return seg;
    }
  }
  return nullptr;
}

const NativeFunction *NativeModule::TryGetFunction(uint64_t ea) const {
  auto func_it = ea_to_func.find(ea);
  if (func_it == ea_to_func.end()) {
    return nullptr;
  }
  return func_it->second;
}

const NativeVariable *NativeModule::TryGetVariable(uint64_t ea) const {
  auto var_it = ea_to_var.find(ea);
  if (var_it == ea_to_var.end()) {
    return nullptr;
  }
  return var_it->second;
}

// Try to get the block containing `inst_ea`.
const NativeBlock *NativeModule::TryGetBlock(uint64_t inst_ea,
                                             const NativeBlock *curr) const {

  for (uint64_t block_ea = inst_ea, i = 0u; block_ea && i < 256u;
       block_ea -= 1, ++i) {
    if (curr && curr->ea <= inst_ea && inst_ea <= curr->last_inst_ea) {
      return curr;
    }

    auto block_it = ea_to_block.find(block_ea);
    if (block_it != ea_to_block.end()) {
      curr = block_it->second.get();
    } else {
      curr = nullptr;
    }
  }

  return nullptr;
}

// Try to get the instruction at `ea`.
const NativeInstruction *NativeModule::TryGetInstruction(uint64_t ea) const {
  auto inst_it = ea_to_inst.find(ea);
  if (inst_it == ea_to_inst.end()) {
    return nullptr;
  }
  return inst_it->second.get();
}

NativeSegment *NativeModule::TryGetSegment(uint64_t ea) {
  return const_cast<NativeSegment *>(
      const_cast<const NativeModule *>(this)->TryGetSegment(ea));
}

NativeFunction *NativeModule::TryGetFunction(uint64_t ea) {
  return const_cast<NativeFunction *>(
      const_cast<const NativeModule *>(this)->TryGetFunction(ea));
}

NativeVariable *NativeModule::TryGetVariable(uint64_t ea) {
  return const_cast<NativeVariable *>(
      const_cast<const NativeModule *>(this)->TryGetVariable(ea));
}

}  // namespace mcsema
